import sys
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
print('NOVA_HOME is at', os.getenv('NOVA_HOME'))
sys.path.insert(1, os.getenv('NOVA_HOME'))
%load_ext autoreload
%autoreload 2
from utils import *
from manuscript.plot_config import PlotConfig
NOVA_HOME is at /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA NOVA_HOME: /home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA
cfg = PlotConfig()
color_mappings = cfg.COLOR_MAPPINGS_MARKERS
# build label_map
label_map = {k: v["alias"] for k, v in color_mappings.items()}
common = dict(
batches=[1,2,3],
classifier_class=LinearSVC,
classifier_kwargs={"C": 1.0, "max_iter": 1000, "random_state": 42},
train_each_as_singleton=True, ## Train on one batch, test on rest
label_map=label_map
)
dataset_config = {
"path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen",
"multiplexed": False,
"config_fmt": "NIH_UMAP1_DatasetConfig_B{batch}",
"config_dir": "manuscript/manuscript_figures_data_config",
}
run_baseline_model(
dataset_config= dataset_config,
**common
)
2025-09-23 14:34:03 INFO: [NIH_UMAP1_DatasetConfig_B1] Init (log path: /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen/logs/230925_143403_748265_467779_galavir_sysdashboardsysjupyter.log; JOBID: 467779 Username: galavir) JOBNAME: sysdashboardsysjupyter 2025-09-23 14:34:03 INFO: [NIH_UMAP1_DatasetConfig_B1] NOVA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA_GAL/NOVA, NOVA_DATA_HOME=/home/projects/hornsteinlab/Collaboration/NOVA/input 2025-09-23 14:34:03 INFO: [load_embeddings] multiplex=False 2025-09-23 14:34:03 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:34:03 INFO: [load_embeddings] input_folders = ['batch1'] 2025-09-23 14:34:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen
Loading all batches...
2025-09-23 14:34:07 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:34:08 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:34:09 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:34:09 INFO: [load_embeddings] embeddings shape: (115587, 192) 2025-09-23 14:34:09 INFO: [load_embeddings] labels shape: (115587,) 2025-09-23 14:34:09 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-09-23 14:34:09 INFO: [load_embeddings] paths shape: (115587,) 2025-09-23 14:34:09 INFO: [load_embeddings] multiplex=False 2025-09-23 14:34:09 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:34:09 INFO: [load_embeddings] input_folders = ['batch2'] 2025-09-23 14:34:09 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-09-23 14:34:12 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:34:12 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:34:13 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:34:13 INFO: [load_embeddings] embeddings shape: (94059, 192) 2025-09-23 14:34:13 INFO: [load_embeddings] labels shape: (94059,) 2025-09-23 14:34:13 INFO: [load_embeddings] example label: DCP1A_WT_Untreated 2025-09-23 14:34:13 INFO: [load_embeddings] paths shape: (94059,) 2025-09-23 14:34:13 INFO: [load_embeddings] multiplex=False 2025-09-23 14:34:13 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:34:13 INFO: [load_embeddings] input_folders = ['batch3'] 2025-09-23 14:34:13 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/finetunedModel_MLPHead_acrossBatches_B56789_80pct_frozen 2025-09-23 14:34:16 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:34:17 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:34:17 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:34:17 INFO: [load_embeddings] embeddings shape: (87130, 192) 2025-09-23 14:34:17 INFO: [load_embeddings] labels shape: (87130,) 2025-09-23 14:34:17 INFO: [load_embeddings] example label: TUJ1_WT_Untreated 2025-09-23 14:34:17 INFO: [load_embeddings] paths shape: (87130,)
Batches loaded. Training on Batches: [1], Testing on: [2, 3]. === Fold (test=[2, 3]) === Train: (115587, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (181189, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Nucleus: 30428 Microtubule: 22601 Lysosome: 3067 P-Bodies: 2364 TDP-43 granules: 2534 Paraspeckles: 2622 Presynapse: 2454 Coated vesicles: 2439 Peroxisome: 2505 PURA granules: 2712 Stress granules: 2842 Actin Cytoskeleton: 2219 NEMO granules: 2935 Autophagosomes: 2651 PML bodies: 2297 Golgi: 2371 ER: 3056 Transport machinery: 2622 FMRP granules: 2913 Nucleolus: 2709 MOM: 2363 hnRNP complex: 2728 Mitochondria: 2728 TIA1 granules: 2712 Postsynapse: 2101 ANXA11 granules: 2614
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996335 0.932796 0.997760 0.903242 0.998492 0.917781 NaN
Actin Cytoskeleton 0.997687 0.913210 0.999678 0.985256 0.997959 0.947866 NaN
Autophagosomes 0.988885 0.563735 0.996377 0.732809 0.992342 0.637248 NaN
Coated vesicles 0.994867 0.811572 0.999621 0.982294 0.995135 0.888809 NaN
ER 0.998162 0.964861 0.998993 0.959856 0.999123 0.962352 NaN
FMRP granules 0.991324 0.792927 0.995917 0.818067 0.995209 0.805301 NaN
Golgi 0.999023 0.980964 0.999457 0.977518 0.999542 0.979238 NaN
Lysosome 0.997715 0.952215 0.998896 0.957227 0.998760 0.954715 NaN
MOM 0.998697 0.976356 0.999242 0.969167 0.999423 0.972748 NaN
Microtubule 0.993035 0.991257 0.993461 0.973184 0.997897 0.982137 NaN
Mitochondria 0.998852 0.978065 0.999361 0.974017 0.999463 0.976037 NaN
NEMO granules 0.997218 0.966719 0.997991 0.924194 0.999156 0.944978 NaN
Nucleolus 0.999299 0.994666 0.999412 0.976326 0.999870 0.985411 NaN
Nucleus 0.999349 1.000000 0.999118 0.997518 1.000000 0.998757 NaN
P-Bodies 0.991931 0.818246 0.996073 0.832449 0.995668 0.825287 NaN
PML bodies 0.993460 0.727695 0.998605 0.909884 0.994749 0.808655 NaN
PURA granules 0.997285 0.987066 0.997526 0.903926 0.999694 0.943668 NaN
Paraspeckles 0.997842 0.945460 0.998924 0.947786 0.998873 0.946621 NaN
Peroxisome 0.997997 0.937643 0.999334 0.968964 0.998619 0.953046 NaN
Postsynapse 0.988658 0.805603 0.992300 0.675445 0.996118 0.734804 NaN
Presynapse 0.994464 0.973545 0.994910 0.802967 0.999434 0.880067 NaN
Stress granules 0.998438 0.976554 0.999014 0.963089 0.999382 0.969775 NaN
TDP-43 granules 0.997003 0.932361 0.998377 0.924270 0.998562 0.928298 NaN
TIA1 granules 0.995684 0.951249 0.996729 0.872467 0.998851 0.910156 NaN
Transport machinery 0.995452 0.855670 0.998981 0.954977 0.996366 0.902600 NaN
hnRNP complex 0.997798 0.921727 0.999661 0.985192 0.998086 0.952404 NaN
Macro Average 0.996018 0.909699 0.997912 0.918157 0.997953 0.911875 0.948231
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [2], Testing on: [1, 3]. === Fold (test=[1, 3]) === Train: (94059, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (202717, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] P-Bodies: 2319 Autophagosomes: 1654 ANXA11 granules: 2123 Microtubule: 18531 Nucleus: 24823 ER: 2079 NEMO granules: 2360 Presynapse: 1923 MOM: 2114 Transport machinery: 2104 Actin Cytoskeleton: 2019 Peroxisome: 2074 Golgi: 2110 Mitochondria: 2236 Nucleolus: 2227 Coated vesicles: 2536 FMRP granules: 2608 Postsynapse: 1631 PML bodies: 1818 Stress granules: 2265 TDP-43 granules: 1934 Paraspeckles: 1916 PURA granules: 2090 TIA1 granules: 2086 hnRNP complex: 2236 Lysosome: 2243
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996971 0.925851 0.998573 0.935915 0.998331 0.930856 NaN
Actin Cytoskeleton 0.997820 0.973004 0.998366 0.929211 0.999404 0.950603 NaN
Autophagosomes 0.988575 0.546312 0.997784 0.836977 0.990621 0.661106 NaN
Coated vesicles 0.994367 0.778720 0.999243 0.958803 0.995017 0.859429 NaN
ER 0.997977 0.964180 0.998900 0.959904 0.999022 0.962037 NaN
FMRP granules 0.991402 0.732804 0.997146 0.850817 0.994083 0.787413 NaN
Golgi 0.998782 0.985607 0.999082 0.960717 0.999672 0.973003 NaN
Lysosome 0.997716 0.954318 0.998905 0.959821 0.998748 0.957062 NaN
MOM 0.999063 0.985536 0.999374 0.973166 0.999667 0.979312 NaN
Microtubule 0.993745 0.992654 0.994005 0.975329 0.998239 0.983916 NaN
Mitochondria 0.999176 0.975534 0.999752 0.989693 0.999404 0.982562 NaN
NEMO granules 0.996063 0.988124 0.996266 0.871204 0.999695 0.925988 NaN
Nucleolus 0.999329 0.986859 0.999631 0.984804 0.999682 0.985830 NaN
Nucleus 0.999640 1.000000 0.999512 0.998625 1.000000 0.999312 NaN
P-Bodies 0.992857 0.873623 0.995420 0.803883 0.997279 0.837303 NaN
PML bodies 0.993326 0.788265 0.997369 0.855245 0.995831 0.820390 NaN
PURA granules 0.998372 0.981447 0.998782 0.951303 0.999550 0.966140 NaN
Paraspeckles 0.998219 0.961125 0.999037 0.956532 0.999143 0.958823 NaN
Peroxisome 0.997760 0.938761 0.999057 0.956308 0.998654 0.947454 NaN
Postsynapse 0.988807 0.774725 0.993121 0.694115 0.995450 0.732208 NaN
Presynapse 0.995319 0.915797 0.997046 0.870754 0.998168 0.892708 NaN
Stress granules 0.998722 0.977038 0.999296 0.973499 0.999392 0.975265 NaN
TDP-43 granules 0.997479 0.942334 0.998694 0.940827 0.998729 0.941580 NaN
TIA1 granules 0.995733 0.936534 0.997166 0.888845 0.998462 0.912067 NaN
Transport machinery 0.995521 0.918072 0.997471 0.901420 0.997936 0.909670 NaN
hnRNP complex 0.998343 0.960813 0.999257 0.969253 0.999045 0.965015 NaN
Macro Average 0.996196 0.913771 0.998010 0.921037 0.998047 0.915271 0.950542
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [3], Testing on: [1, 2]. === Fold (test=[1, 2]) === Train: (87130, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (209646, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Microtubule: 16469 Transport machinery: 2358 ER: 2332 Nucleus: 22599 PML bodies: 1623 TIA1 granules: 2078 Paraspeckles: 1751 Coated vesicles: 2044 PURA granules: 2085 FMRP granules: 1492 Postsynapse: 1903 Lysosome: 2340 Peroxisome: 1855 TDP-43 granules: 1836 ANXA11 granules: 1850 NEMO granules: 2117 Actin Cytoskeleton: 2152 Presynapse: 1857 P-Bodies: 1901 MOM: 2200 hnRNP complex: 2095 Nucleolus: 2085 Autophagosomes: 1484 Stress granules: 2384 Golgi: 2145 Mitochondria: 2095
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996995 0.963057 0.997780 0.909308 0.999145 0.935411 NaN
Actin Cytoskeleton 0.997615 0.971213 0.998160 0.915888 0.999405 0.942739 NaN
Autophagosomes 0.978340 0.300116 0.992559 0.458156 0.985432 0.362667 NaN
Coated vesicles 0.997601 0.964623 0.998402 0.936208 0.999139 0.950203 NaN
ER 0.996427 0.964752 0.997223 0.897139 0.999113 0.929718 NaN
FMRP granules 0.981688 0.425648 0.996727 0.778661 0.984654 0.550416 NaN
Golgi 0.998898 0.987280 0.999152 0.962157 0.999722 0.974557 NaN
Lysosome 0.997057 0.933710 0.998703 0.949263 0.998278 0.941422 NaN
MOM 0.998979 0.988832 0.999201 0.964278 0.999756 0.976401 NaN
Microtubule 0.997529 0.991564 0.998985 0.995825 0.997943 0.993690 NaN
Mitochondria 0.999175 0.985898 0.999497 0.979388 0.999658 0.982632 NaN
NEMO granules 0.996742 0.901794 0.999202 0.966991 0.997460 0.933255 NaN
Nucleolus 0.999523 0.985616 0.999858 0.994074 0.999653 0.989827 NaN
Nucleus 0.999948 1.000000 0.999929 0.999801 1.000000 0.999900 NaN
P-Bodies 0.987031 0.862268 0.989881 0.660668 0.996831 0.748124 NaN
PML bodies 0.992712 0.842041 0.995728 0.797836 0.996834 0.819343 NaN
PURA granules 0.998631 0.954602 0.999663 0.985171 0.998937 0.969646 NaN
Paraspeckles 0.998178 0.958131 0.999064 0.957709 0.999074 0.957920 NaN
Peroxisome 0.996847 0.963748 0.997586 0.899144 0.999189 0.930326 NaN
Postsynapse 0.990241 0.562433 0.997994 0.835589 0.992116 0.672325 NaN
Presynapse 0.995049 0.820425 0.998772 0.934426 0.996181 0.873723 NaN
Stress granules 0.996623 0.986098 0.996886 0.887714 0.999652 0.934323 NaN
TDP-43 granules 0.996027 0.948075 0.997071 0.875749 0.998867 0.910478 NaN
TIA1 granules 0.996356 0.897249 0.998677 0.940778 0.997596 0.918498 NaN
Transport machinery 0.982876 0.918959 0.984350 0.575232 0.998105 0.707559 NaN
hnRNP complex 0.997381 0.912369 0.999443 0.975447 0.997878 0.942854 NaN
Macro Average 0.994787 0.884250 0.997327 0.885869 0.997331 0.878768 0.932233
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
=== Evaluation Metrics (from aggregated confusion) ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996785 0.941324 0.998044 0.916150 0.998667 0.928566 NaN
Actin Cytoskeleton 0.997707 0.952895 0.998693 0.941331 0.998963 0.947078 NaN
Autophagosomes 0.985054 0.459492 0.995510 0.670616 0.989314 0.545333 NaN
Coated vesicles 0.995662 0.855321 0.999061 0.956657 0.996504 0.903155 NaN
ER 0.997486 0.964577 0.998336 0.937337 0.999085 0.950762 NaN
FMRP granules 0.987947 0.629474 0.996623 0.818561 0.991082 0.711672 NaN
Golgi 0.998896 0.984682 0.999221 0.966521 0.999650 0.975517 NaN
Lysosome 0.997483 0.946536 0.998831 0.955403 0.998586 0.950949 NaN
MOM 0.998922 0.983675 0.999273 0.968874 0.999624 0.976219 NaN
Microtubule 0.994865 0.991840 0.995593 0.981885 0.998030 0.986838 NaN
Mitochondria 0.999077 0.979955 0.999543 0.981206 0.999512 0.980580 NaN
NEMO granules 0.996656 0.950823 0.997830 0.918181 0.998739 0.934217 NaN
Nucleolus 0.999388 0.988819 0.999645 0.985381 0.999729 0.987097 NaN
Nucleus 0.999660 1.000000 0.999539 0.998704 1.000000 0.999352 NaN
P-Bodies 0.990516 0.851838 0.993663 0.753072 0.996628 0.799416 NaN
PML bodies 0.993150 0.789387 0.997167 0.846003 0.995853 0.816715 NaN
PURA granules 0.998132 0.973791 0.998710 0.947179 0.999377 0.960301 NaN
Paraspeckles 0.998089 0.955478 0.999012 0.954416 0.999036 0.954946 NaN
Peroxisome 0.997510 0.947311 0.998622 0.938414 0.998832 0.942842 NaN
Postsynapse 0.989268 0.714108 0.994594 0.718828 0.994467 0.716460 NaN
Presynapse 0.994963 0.899824 0.997004 0.865664 0.997849 0.882413 NaN
Stress granules 0.997894 0.979976 0.998358 0.939227 0.999481 0.959169 NaN
TDP-43 granules 0.996821 0.941386 0.998024 0.911808 0.998727 0.926361 NaN
TIA1 granules 0.995938 0.927283 0.997566 0.900374 0.998274 0.913631 NaN
Transport machinery 0.991034 0.898715 0.993291 0.766125 0.997513 0.827140 NaN
hnRNP complex 0.997837 0.931789 0.999446 0.976180 0.998340 0.953468 NaN
Macro Average 0.995644 0.901550 0.997738 0.904388 0.997764 0.901161 0.94337
=== Average Metrics Across Folds ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996767 0.940568 0.998037 0.916155 0.998656 0.928016 NaN
Actin Cytoskeleton 0.997707 0.952476 0.998735 0.943452 0.998923 0.947070 NaN
Autophagosomes 0.985266 0.470054 0.995573 0.675980 0.989465 0.553674 NaN
Coated vesicles 0.995611 0.851638 0.999089 0.959101 0.996431 0.899480 NaN
ER 0.997522 0.964597 0.998372 0.938966 0.999086 0.951369 NaN
FMRP granules 0.988138 0.650459 0.996597 0.815849 0.991315 0.714377 NaN
Golgi 0.998901 0.984617 0.999230 0.966797 0.999645 0.975599 NaN
Lysosome 0.997496 0.946748 0.998835 0.955437 0.998596 0.951066 NaN
MOM 0.998913 0.983575 0.999272 0.968870 0.999615 0.976154 NaN
Microtubule 0.994770 0.991825 0.995484 0.981446 0.998026 0.986581 NaN
Mitochondria 0.999068 0.979832 0.999537 0.981033 0.999508 0.980411 NaN
NEMO granules 0.996675 0.952212 0.997820 0.920796 0.998770 0.934740 NaN
Nucleolus 0.999384 0.989047 0.999634 0.985068 0.999735 0.987023 NaN
Nucleus 0.999645 1.000000 0.999520 0.998648 1.000000 0.999323 NaN
P-Bodies 0.990606 0.851379 0.993791 0.765667 0.996593 0.803571 NaN
PML bodies 0.993166 0.786001 0.997234 0.854321 0.995805 0.816129 NaN
PURA granules 0.998096 0.974372 0.998657 0.946800 0.999394 0.959818 NaN
Paraspeckles 0.998080 0.954905 0.999008 0.954009 0.999030 0.954455 NaN
Peroxisome 0.997535 0.946717 0.998659 0.941472 0.998821 0.943609 NaN
Postsynapse 0.989235 0.714254 0.994472 0.735050 0.994561 0.713113 NaN
Presynapse 0.994944 0.903256 0.996910 0.869383 0.997928 0.882166 NaN
Stress granules 0.997928 0.979896 0.998399 0.941434 0.999476 0.959787 NaN
TDP-43 granules 0.996836 0.940923 0.998047 0.913616 0.998720 0.926785 NaN
TIA1 granules 0.995924 0.928344 0.997524 0.900697 0.998303 0.913574 NaN
Transport machinery 0.991283 0.897567 0.993601 0.810543 0.997469 0.839943 NaN
hnRNP complex 0.997841 0.931636 0.999454 0.976631 0.998336 0.953424 NaN
Macro Average 0.995667 0.902573 0.997750 0.908355 0.997777 0.901971 0.943669
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
{'Accuracy': 0.9956668237232452,
'Sensitivity': 0.9025730353738828,
'Specificity': 0.9977496228381414,
'PPV': 0.9083546275272392,
'NPV': 0.9977771596059583,
'F1': 0.9019713749781145,
'Correct/Total Accuracy': 0.9436687084021894}
Cytoself_dataset_config = {
"path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/",
"multiplexed": False,
"config_fmt": "NIH_UMAP1_DatasetConfig_B{batch}",
"config_dir": "manuscript/manuscript_figures_data_config",
}
run_baseline_model(
dataset_config= Cytoself_dataset_config,
**common
)
2025-09-23 14:35:45 INFO: [load_embeddings] multiplex=False 2025-09-23 14:35:45 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:35:45 INFO: [load_embeddings] input_folders = ['batch1'] 2025-09-23 14:35:45 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/
Loading all batches...
2025-09-23 14:36:00 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:36:03 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:36:04 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:36:05 INFO: [load_embeddings] embeddings shape: (115590, 2048) 2025-09-23 14:36:05 INFO: [load_embeddings] labels shape: (115590,) 2025-09-23 14:36:05 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-09-23 14:36:05 INFO: [load_embeddings] paths shape: (115590,) 2025-09-23 14:36:05 INFO: [load_embeddings] multiplex=False 2025-09-23 14:36:05 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:36:05 INFO: [load_embeddings] input_folders = ['batch2'] 2025-09-23 14:36:05 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-09-23 14:36:16 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:36:18 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:36:19 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:36:20 INFO: [load_embeddings] embeddings shape: (94059, 2048) 2025-09-23 14:36:20 INFO: [load_embeddings] labels shape: (94059,) 2025-09-23 14:36:20 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-09-23 14:36:20 INFO: [load_embeddings] paths shape: (94059,) 2025-09-23 14:36:20 INFO: [load_embeddings] multiplex=False 2025-09-23 14:36:20 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:36:20 INFO: [load_embeddings] input_folders = ['batch3'] 2025-09-23 14:36:20 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/cytoself_model/ 2025-09-23 14:36:33 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:36:35 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:36:36 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:36:36 INFO: [load_embeddings] embeddings shape: (87130, 2048) 2025-09-23 14:36:36 INFO: [load_embeddings] labels shape: (87130,) 2025-09-23 14:36:36 INFO: [load_embeddings] example label: TIA1_WT_Untreated 2025-09-23 14:36:36 INFO: [load_embeddings] paths shape: (87130,)
Batches loaded. Training on Batches: [1], Testing on: [2, 3]. === Fold (test=[2, 3]) === Train: (115590, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (181189, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] TIA1 granules: 2712 ANXA11 granules: 2614 Coated vesicles: 2439 ER: 3056 Nucleus: 30429 P-Bodies: 2364 FMRP granules: 2913 hnRNP complex: 2728 Stress granules: 2842 Golgi: 2371 Transport machinery: 2622 Lysosome: 3067 Mitochondria: 2728 Nucleolus: 2709 NEMO granules: 2935 Paraspeckles: 2623 Peroxisome: 2505 PML bodies: 2297 Postsynapse: 2101 PURA granules: 2712 Actin Cytoskeleton: 2219 Presynapse: 2454 Autophagosomes: 2651 TDP-43 granules: 2535 MOM: 2363 Microtubule: 22601
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.992030 0.837402 0.995497 0.806545 0.996352 0.821684 NaN
Actin Cytoskeleton 0.996683 0.889954 0.999198 0.963155 0.997412 0.925109 NaN
Autophagosomes 0.982057 0.420331 0.991957 0.479462 0.989806 0.447954 NaN
Coated vesicles 0.993785 0.782751 0.999258 0.964747 0.994394 0.864272 NaN
ER 0.997312 0.950805 0.998473 0.939516 0.998772 0.945127 NaN
FMRP granules 0.983498 0.754390 0.988802 0.609338 0.994282 0.674150 NaN
Golgi 0.996578 0.920094 0.998417 0.933254 0.998079 0.926627 NaN
Lysosome 0.996197 0.947414 0.997463 0.906472 0.998634 0.926491 NaN
MOM 0.997770 0.949235 0.998954 0.956776 0.998762 0.952990 NaN
Microtubule 0.994829 0.993000 0.995266 0.980478 0.998319 0.986699 NaN
Mitochondria 0.998228 0.961903 0.999118 0.963906 0.999067 0.962903 NaN
NEMO granules 0.994983 0.890328 0.997635 0.905086 0.997223 0.897647 NaN
Nucleolus 0.998620 0.983766 0.998982 0.959294 0.999604 0.971376 NaN
Nucleus 0.999305 0.999768 0.999140 0.997580 0.999918 0.998673 NaN
P-Bodies 0.987554 0.680806 0.994869 0.759852 0.992407 0.718160 NaN
PML bodies 0.988570 0.601569 0.996062 0.747292 0.992316 0.666559 NaN
PURA granules 0.995469 0.971976 0.996023 0.852163 0.999337 0.908135 NaN
Paraspeckles 0.996280 0.901282 0.998242 0.913741 0.997961 0.907468 NaN
Peroxisome 0.996037 0.859252 0.999069 0.953403 0.996887 0.903882 NaN
Postsynapse 0.984199 0.609791 0.991647 0.592196 0.992233 0.600864 NaN
Presynapse 0.990711 0.892593 0.992802 0.725435 0.997700 0.800380 NaN
Stress granules 0.996208 0.957195 0.997236 0.901175 0.998871 0.928340 NaN
TDP-43 granules 0.994812 0.885411 0.997137 0.867915 0.997564 0.876576 NaN
TIA1 granules 0.992069 0.870077 0.994939 0.801726 0.996938 0.834504 NaN
Transport machinery 0.988520 0.681533 0.996271 0.821892 0.991994 0.745160 NaN
hnRNP complex 0.995695 0.877857 0.998581 0.938071 0.997014 0.906966 NaN
Macro Average 0.993385 0.848865 0.996578 0.855403 0.996609 0.849950 0.914001
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [2], Testing on: [1, 3]. === Fold (test=[1, 3]) === Train: (94059, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (202720, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] TIA1 granules: 2086 ANXA11 granules: 2123 Coated vesicles: 2536 ER: 2079 Nucleus: 24823 P-Bodies: 2319 FMRP granules: 2608 hnRNP complex: 2236 Stress granules: 2265 Golgi: 2110 Transport machinery: 2104 Lysosome: 2243 Mitochondria: 2236 Nucleolus: 2227 NEMO granules: 2360 Paraspeckles: 1916 Peroxisome: 2074 PML bodies: 1818 Postsynapse: 1631 PURA granules: 2090 Actin Cytoskeleton: 2019 Presynapse: 1923 Autophagosomes: 1654 TDP-43 granules: 1934 MOM: 2114 Microtubule: 18531
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.992847 0.831541 0.996479 0.841723 0.996208 0.836601 NaN
Actin Cytoskeleton 0.996902 0.950126 0.997933 0.910147 0.998900 0.929707 NaN
Autophagosomes 0.980663 0.448609 0.991742 0.530758 0.988556 0.486239 NaN
Coated vesicles 0.994061 0.813295 0.998149 0.908547 0.995788 0.858286 NaN
ER 0.997731 0.967706 0.998551 0.948000 0.999118 0.957752 NaN
FMRP granules 0.981802 0.579796 0.990732 0.581512 0.990667 0.580652 NaN
Golgi 0.995309 0.916740 0.997099 0.878049 0.998101 0.896978 NaN
Lysosome 0.996014 0.932125 0.997765 0.919540 0.998139 0.925790 NaN
MOM 0.997869 0.962744 0.998678 0.943716 0.999142 0.953135 NaN
Microtubule 0.995116 0.992577 0.995723 0.982270 0.998223 0.987397 NaN
Mitochondria 0.998214 0.961227 0.999116 0.963625 0.999055 0.962425 NaN
NEMO granules 0.992413 0.958036 0.993292 0.784950 0.998921 0.862899 NaN
Nucleolus 0.997928 0.965999 0.998702 0.947422 0.999176 0.956621 NaN
Nucleus 0.999477 0.999227 0.999566 0.998775 0.999726 0.999001 NaN
P-Bodies 0.987564 0.695193 0.993847 0.708313 0.993452 0.701692 NaN
PML bodies 0.987826 0.638520 0.994713 0.704277 0.992885 0.669789 NaN
PURA granules 0.997425 0.948718 0.998606 0.942822 0.998757 0.945761 NaN
Paraspeckles 0.996295 0.911980 0.998155 0.915959 0.998059 0.913965 NaN
Peroxisome 0.995422 0.851147 0.998593 0.930075 0.996734 0.888862 NaN
Postsynapse 0.983618 0.557193 0.992210 0.590368 0.991088 0.573301 NaN
Presynapse 0.991180 0.792160 0.995504 0.792895 0.995484 0.792527 NaN
Stress granules 0.997109 0.951588 0.998314 0.937241 0.998718 0.944360 NaN
TDP-43 granules 0.994771 0.881720 0.997262 0.876507 0.997393 0.879106 NaN
TIA1 granules 0.992852 0.880167 0.995579 0.828128 0.997096 0.853355 NaN
Transport machinery 0.987885 0.712851 0.994811 0.775787 0.992783 0.742989 NaN
hnRNP complex 0.997139 0.930334 0.998767 0.948425 0.998303 0.939292 NaN
Macro Average 0.993286 0.847359 0.996534 0.849609 0.996557 0.847634 0.912717
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [3], Testing on: [1, 2]. === Fold (test=[1, 2]) === Train: (87130, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (209649, 2048) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] TIA1 granules: 2078 ANXA11 granules: 1850 Coated vesicles: 2044 ER: 2332 Nucleus: 22599 P-Bodies: 1901 FMRP granules: 1492 hnRNP complex: 2095 Stress granules: 2384 Golgi: 2145 Transport machinery: 2358 Lysosome: 2340 Mitochondria: 2095 Nucleolus: 2085 NEMO granules: 2117 Paraspeckles: 1751 Peroxisome: 1855 PML bodies: 1623 Postsynapse: 1903 PURA granules: 2085 Actin Cytoskeleton: 2152 Presynapse: 1857 Autophagosomes: 1484 TDP-43 granules: 1836 MOM: 2200 Microtubule: 16469
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn( /home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_base.py:1242: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.992053 0.851805 0.995296 0.807161 0.996570 0.828882 NaN
Actin Cytoskeleton 0.995640 0.947853 0.996626 0.852866 0.998922 0.897854 NaN
Autophagosomes 0.973747 0.291289 0.988054 0.338279 0.985185 0.313030 NaN
Coated vesicles 0.994963 0.875980 0.997855 0.908484 0.996988 0.891936 NaN
ER 0.997310 0.966894 0.998073 0.926479 0.999168 0.946255 NaN
FMRP granules 0.974042 0.270965 0.993058 0.513560 0.980531 0.354755 NaN
Golgi 0.995015 0.937068 0.996281 0.846231 0.998622 0.889336 NaN
Lysosome 0.996256 0.919586 0.998248 0.931692 0.997911 0.925599 NaN
MOM 0.998455 0.960688 0.999279 0.966734 0.999142 0.963702 NaN
Microtubule 0.996995 0.988671 0.999027 0.995983 0.997240 0.992314 NaN
Mitochondria 0.998626 0.984287 0.998974 0.958791 0.999619 0.971372 NaN
NEMO granules 0.991925 0.790557 0.997142 0.877568 0.994587 0.831793 NaN
Nucleolus 0.997987 0.975891 0.998520 0.940820 0.999418 0.958035 NaN
Nucleus 0.999227 0.999023 0.999301 0.998047 0.999650 0.998535 NaN
P-Bodies 0.980568 0.708947 0.986773 0.550489 0.993306 0.619750 NaN
PML bodies 0.982475 0.678979 0.988552 0.542840 0.993540 0.603325 NaN
PURA granules 0.997262 0.906706 0.999385 0.971875 0.997816 0.938160 NaN
Paraspeckles 0.996342 0.934347 0.997713 0.900425 0.998546 0.917072 NaN
Peroxisome 0.995340 0.936886 0.996645 0.861792 0.998588 0.897771 NaN
Postsynapse 0.979990 0.396034 0.990574 0.432290 0.989070 0.413369 NaN
Presynapse 0.989826 0.664382 0.996765 0.814110 0.992872 0.731664 NaN
Stress granules 0.996699 0.934600 0.998250 0.930228 0.998367 0.932409 NaN
TDP-43 granules 0.993274 0.853435 0.996320 0.834756 0.996806 0.843992 NaN
TIA1 granules 0.992030 0.773030 0.997159 0.864367 0.994697 0.816151 NaN
Transport machinery 0.979976 0.759204 0.985068 0.539711 0.994394 0.630913 NaN
hnRNP complex 0.995297 0.845286 0.998935 0.950612 0.996258 0.894860 NaN
Macro Average 0.991589 0.813554 0.995687 0.809854 0.995685 0.807801 0.89066
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
=== Evaluation Metrics (from aggregated confusion) ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.992318 0.840595 0.995761 0.818236 0.996379 0.829265 NaN
Actin Cytoskeleton 0.996390 0.929734 0.997856 0.905157 0.998453 0.917281 NaN
Autophagosomes 0.978646 0.382449 0.990507 0.444891 0.987749 0.411314 NaN
Coated vesicles 0.994295 0.825545 0.998383 0.925196 0.995785 0.872534 NaN
ER 0.997454 0.962435 0.998358 0.938002 0.999030 0.950061 NaN
FMRP granules 0.979579 0.509269 0.990962 0.576932 0.988157 0.540993 NaN
Golgi 0.995593 0.924691 0.997212 0.883362 0.998278 0.903554 NaN
Lysosome 0.996155 0.932353 0.997844 0.919611 0.998209 0.925938 NaN
MOM 0.998046 0.957691 0.998974 0.955544 0.999026 0.956616 NaN
Microtubule 0.995692 0.991311 0.996747 0.986558 0.997905 0.988929 NaN
Mitochondria 0.998364 0.969542 0.999066 0.961979 0.999258 0.965746 NaN
NEMO granules 0.993025 0.877766 0.995977 0.848240 0.996866 0.862750 NaN
Nucleolus 0.998160 0.974932 0.998723 0.948718 0.999392 0.961647 NaN
Nucleus 0.999336 0.999319 0.999342 0.998152 0.999758 0.998736 NaN
P-Bodies 0.985090 0.695474 0.991661 0.654236 0.993081 0.674225 NaN
PML bodies 0.986163 0.641948 0.992949 0.642228 0.992941 0.642088 NaN
PURA granules 0.996770 0.941121 0.998092 0.921387 0.998600 0.931150 NaN
Paraspeckles 0.996307 0.916932 0.998026 0.909557 0.998201 0.913229 NaN
Peroxisome 0.995581 0.884131 0.998051 0.909505 0.997434 0.896639 NaN
Postsynapse 0.982514 0.520319 0.991460 0.541109 0.990723 0.530511 NaN
Presynapse 0.990559 0.777751 0.995125 0.773903 0.995231 0.775822 NaN
Stress granules 0.996689 0.947537 0.997962 0.923317 0.998641 0.935270 NaN
TDP-43 granules 0.994255 0.872799 0.996891 0.859038 0.997238 0.865864 NaN
TIA1 granules 0.992323 0.839732 0.995942 0.830732 0.996198 0.835208 NaN
Transport machinery 0.985285 0.718450 0.991810 0.682056 0.993106 0.699780 NaN
hnRNP complex 0.996048 0.884332 0.998770 0.945977 0.997186 0.914116 NaN
Macro Average 0.992717 0.835314 0.996248 0.834755 0.996263 0.834587 0.905318
=== Average Metrics Across Folds ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.992310 0.840250 0.995757 0.818477 0.996376 0.829056 NaN
Actin Cytoskeleton 0.996408 0.929311 0.997919 0.908723 0.998411 0.917557 NaN
Autophagosomes 0.978822 0.386743 0.990584 0.449500 0.987849 0.415741 NaN
Coated vesicles 0.994270 0.824009 0.998421 0.927260 0.995723 0.871498 NaN
ER 0.997451 0.961802 0.998366 0.937998 0.999019 0.949711 NaN
FMRP granules 0.979781 0.535050 0.990864 0.568137 0.988493 0.536519 NaN
Golgi 0.995634 0.924634 0.997266 0.885845 0.998267 0.904314 NaN
Lysosome 0.996156 0.933042 0.997825 0.919235 0.998228 0.925960 NaN
MOM 0.998031 0.957556 0.998970 0.955742 0.999015 0.956609 NaN
Microtubule 0.995647 0.991416 0.996672 0.986244 0.997927 0.988803 NaN
Mitochondria 0.998356 0.969139 0.999069 0.962107 0.999247 0.965567 NaN
NEMO granules 0.993107 0.879641 0.996023 0.855868 0.996910 0.864113 NaN
Nucleolus 0.998179 0.975219 0.998735 0.949179 0.999399 0.962011 NaN
Nucleus 0.999336 0.999339 0.999336 0.998134 0.999765 0.998736 NaN
P-Bodies 0.985229 0.694982 0.991830 0.672885 0.993055 0.679867 NaN
PML bodies 0.986290 0.639690 0.993109 0.664803 0.992914 0.646558 NaN
PURA granules 0.996719 0.942467 0.998004 0.922287 0.998637 0.930685 NaN
Paraspeckles 0.996306 0.915869 0.998037 0.910041 0.998189 0.912835 NaN
Peroxisome 0.995600 0.882428 0.998103 0.915090 0.997403 0.896839 NaN
Postsynapse 0.982602 0.521006 0.991477 0.538285 0.990797 0.529178 NaN
Presynapse 0.990572 0.783045 0.995024 0.777480 0.995352 0.774857 NaN
Stress granules 0.996672 0.947794 0.997933 0.922881 0.998652 0.935036 NaN
TDP-43 granules 0.994286 0.873522 0.996906 0.859726 0.997254 0.866558 NaN
TIA1 granules 0.992317 0.841091 0.995892 0.831407 0.996243 0.834670 NaN
Transport machinery 0.985460 0.717863 0.992050 0.712463 0.993057 0.706354 NaN
hnRNP complex 0.996044 0.884492 0.998761 0.945703 0.997191 0.913706 NaN
Macro Average 0.992753 0.836592 0.996267 0.838288 0.996284 0.835128 0.905793
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
{'Accuracy': 0.992753296238766,
'Sensitivity': 0.8365922757073809,
'Specificity': 0.9962666530158648,
'PPV': 0.8382883760889336,
'NPV': 0.9962837269271777,
'F1': 0.8351283651750085,
'Correct/Total Accuracy': 0.9057928511039575}
pretrained_dataset_config = {
"path_to_embeddings": "/home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model",
"multiplexed": False,
"config_fmt": "NIH_UMAP1_DatasetConfig_B{batch}",
"config_dir": "manuscript/manuscript_figures_data_config",
}
run_baseline_model(
dataset_config= pretrained_dataset_config,
**common
)
2025-09-23 14:42:03 INFO: [load_embeddings] multiplex=False 2025-09-23 14:42:03 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:42:03 INFO: [load_embeddings] input_folders = ['batch1'] 2025-09-23 14:42:03 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model
Loading all batches...
2025-09-23 14:42:06 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:42:07 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:42:08 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:42:08 INFO: [load_embeddings] embeddings shape: (115590, 192) 2025-09-23 14:42:08 INFO: [load_embeddings] labels shape: (115590,) 2025-09-23 14:42:08 INFO: [load_embeddings] example label: CLTC_WT_Untreated 2025-09-23 14:42:08 INFO: [load_embeddings] paths shape: (115590,) 2025-09-23 14:42:08 INFO: [load_embeddings] multiplex=False 2025-09-23 14:42:08 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:42:08 INFO: [load_embeddings] input_folders = ['batch2'] 2025-09-23 14:42:08 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-09-23 14:42:11 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:42:11 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:42:12 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:42:12 INFO: [load_embeddings] embeddings shape: (94059, 192) 2025-09-23 14:42:12 INFO: [load_embeddings] labels shape: (94059,) 2025-09-23 14:42:12 INFO: [load_embeddings] example label: DAPI_WT_Untreated 2025-09-23 14:42:12 INFO: [load_embeddings] paths shape: (94059,) 2025-09-23 14:42:12 INFO: [load_embeddings] multiplex=False 2025-09-23 14:42:12 INFO: [load_embeddings] experiment_type = NIH 2025-09-23 14:42:12 INFO: [load_embeddings] input_folders = ['batch3'] 2025-09-23 14:42:12 INFO: [load_embeddings] model_output_folder = /home/projects/hornsteinlab/Collaboration/NOVA/outputs/vit_models/pretrained_model 2025-09-23 14:42:15 INFO: [embeddings_utils._filter] markers_to_exclude = ['CD41'] 2025-09-23 14:42:15 INFO: [embeddings_utils._filter] cell_lines = ['WT'] 2025-09-23 14:42:16 INFO: [embeddings_utils._filter] conditions = ['Untreated'] 2025-09-23 14:42:16 INFO: [load_embeddings] embeddings shape: (87130, 192) 2025-09-23 14:42:16 INFO: [load_embeddings] labels shape: (87130,) 2025-09-23 14:42:16 INFO: [load_embeddings] example label: MitoTracker_WT_Untreated 2025-09-23 14:42:16 INFO: [load_embeddings] paths shape: (87130,)
Batches loaded. Training on Batches: [1], Testing on: [2, 3]. === Fold (test=[2, 3]) === Train: (115590, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (181189, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Coated vesicles: 2439 Nucleus: 30429 Golgi: 2371 PURA granules: 2712 Postsynapse: 2101 Autophagosomes: 2651 Presynapse: 2454 Stress granules: 2842 Peroxisome: 2505 Transport machinery: 2622 NEMO granules: 2935 Mitochondria: 2728 TIA1 granules: 2712 Nucleolus: 2709 Microtubule: 22601 Paraspeckles: 2623 ANXA11 granules: 2614 P-Bodies: 2364 MOM: 2363 FMRP granules: 2913 hnRNP complex: 2728 PML bodies: 2297 ER: 3056 TDP-43 granules: 2535 Actin Cytoskeleton: 2219 Lysosome: 3067
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996313 0.926001 0.997890 0.907723 0.998340 0.916770 NaN
Actin Cytoskeleton 0.998256 0.939343 0.999644 0.984175 0.998572 0.961237 NaN
Autophagosomes 0.990756 0.678776 0.996254 0.761530 0.994350 0.717776 NaN
Coated vesicles 0.993940 0.776856 0.999570 0.979086 0.994244 0.866326 NaN
ER 0.998587 0.968488 0.999338 0.973342 0.999214 0.970909 NaN
FMRP granules 0.988040 0.708293 0.994517 0.749419 0.993255 0.728276 NaN
Golgi 0.998306 0.976028 0.998841 0.952960 0.999423 0.964356 NaN
Lysosome 0.998444 0.971634 0.999139 0.966992 0.999264 0.969308 NaN
MOM 0.999018 0.986555 0.999322 0.972578 0.999672 0.979517 NaN
Microtubule 0.994244 0.991829 0.994822 0.978659 0.998037 0.985200 NaN
Mitochondria 0.998377 0.972062 0.999022 0.960529 0.999316 0.966261 NaN
NEMO granules 0.996407 0.961358 0.997295 0.900042 0.999019 0.929690 NaN
Nucleolus 0.999393 0.991187 0.999593 0.983433 0.999785 0.987295 NaN
Nucleus 0.999404 0.999937 0.999215 0.997791 0.999978 0.998863 NaN
P-Bodies 0.991407 0.763507 0.996841 0.852156 0.994375 0.805399 NaN
PML bodies 0.991749 0.727114 0.996872 0.818182 0.994729 0.769965 NaN
PURA granules 0.997583 0.982275 0.997944 0.918477 0.999581 0.949306 NaN
Paraspeckles 0.997351 0.923643 0.998873 0.944243 0.998423 0.933830 NaN
Peroxisome 0.998273 0.946806 0.999413 0.972803 0.998822 0.959629 NaN
Postsynapse 0.990877 0.796265 0.994748 0.751001 0.995942 0.772971 NaN
Presynapse 0.993079 0.945238 0.994098 0.773377 0.998828 0.850714 NaN
Stress granules 0.996275 0.978920 0.996732 0.887480 0.999443 0.930960 NaN
TDP-43 granules 0.996396 0.929178 0.997824 0.900746 0.998494 0.914741 NaN
TIA1 granules 0.995982 0.950048 0.997063 0.883825 0.998823 0.915741 NaN
Transport machinery 0.994851 0.843568 0.998670 0.941235 0.996061 0.889729 NaN
hnRNP complex 0.997494 0.914108 0.999536 0.979708 0.997900 0.945772 NaN
Macro Average 0.995800 0.905731 0.997811 0.911211 0.997842 0.906944 0.9454
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [2], Testing on: [1, 3]. === Fold (test=[1, 3]) === Train: (94059, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (202720, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Nucleus: 24823 Stress granules: 2265 TDP-43 granules: 1934 Nucleolus: 2227 NEMO granules: 2360 Microtubule: 18531 Peroxisome: 2074 PML bodies: 1818 Transport machinery: 2104 FMRP granules: 2608 TIA1 granules: 2086 Paraspeckles: 1916 ER: 2079 Mitochondria: 2236 Coated vesicles: 2536 Presynapse: 1923 P-Bodies: 2319 Golgi: 2110 Lysosome: 2243 hnRNP complex: 2236 Postsynapse: 1631 MOM: 2114 Actin Cytoskeleton: 2019 Autophagosomes: 1654 ANXA11 granules: 2123 PURA granules: 2090
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996734 0.925403 0.998341 0.926233 0.998320 0.925818 NaN
Actin Cytoskeleton 0.997978 0.974834 0.998488 0.934225 0.999445 0.954098 NaN
Autophagosomes 0.989873 0.615478 0.997669 0.846077 0.992039 0.712586 NaN
Coated vesicles 0.994707 0.803480 0.999031 0.949394 0.995571 0.870364 NaN
ER 0.998426 0.966592 0.999296 0.974004 0.999088 0.970284 NaN
FMRP granules 0.987446 0.633598 0.995305 0.749866 0.991889 0.686846 NaN
Golgi 0.998259 0.983835 0.998587 0.940716 0.999631 0.961792 NaN
Lysosome 0.997884 0.955798 0.999037 0.964539 0.998789 0.960149 NaN
MOM 0.999048 0.990357 0.999248 0.968081 0.999778 0.979092 NaN
Microtubule 0.995023 0.991067 0.995967 0.983241 0.997863 0.987138 NaN
Mitochondria 0.998668 0.967862 0.999419 0.975957 0.999217 0.971893 NaN
NEMO granules 0.995141 0.978029 0.995578 0.849699 0.999436 0.909359 NaN
Nucleolus 0.999438 0.988319 0.999707 0.987907 0.999717 0.988113 NaN
Nucleus 0.999714 0.999906 0.999646 0.999001 0.999967 0.999453 NaN
P-Bodies 0.991427 0.766002 0.996271 0.815323 0.994978 0.789894 NaN
PML bodies 0.990997 0.763010 0.995493 0.769488 0.995328 0.766235 NaN
PURA granules 0.998446 0.971857 0.999091 0.962825 0.999318 0.967320 NaN
Paraspeckles 0.997879 0.953589 0.998856 0.948386 0.998976 0.950980 NaN
Peroxisome 0.997805 0.942661 0.999017 0.954704 0.998740 0.948644 NaN
Postsynapse 0.990973 0.803946 0.994741 0.754925 0.996044 0.778665 NaN
Presynapse 0.994016 0.913477 0.995766 0.824194 0.998116 0.866542 NaN
Stress granules 0.997208 0.974550 0.997808 0.921643 0.999326 0.947359 NaN
TDP-43 granules 0.997035 0.932052 0.998467 0.930562 0.998503 0.931306 NaN
TIA1 granules 0.995866 0.932359 0.997403 0.896787 0.998361 0.914227 NaN
Transport machinery 0.994860 0.904418 0.997138 0.888363 0.997592 0.896318 NaN
hnRNP complex 0.998461 0.964545 0.999288 0.970582 0.999136 0.967554 NaN
Macro Average 0.995897 0.907578 0.997871 0.911028 0.997891 0.907770 0.946655
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
Training on Batches: [3], Testing on: [1, 2]. === Fold (test=[1, 2]) === Train: (87130, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Test: (209649, 192) Labels: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25] Mitochondria: 2095 hnRNP complex: 2095 Nucleus: 22599 Peroxisome: 1855 Microtubule: 16469 Nucleolus: 2085 Actin Cytoskeleton: 2152 MOM: 2200 Golgi: 2145 Autophagosomes: 1484 Coated vesicles: 2044 TDP-43 granules: 1836 TIA1 granules: 2078 NEMO granules: 2117 Lysosome: 2340 PML bodies: 1623 Postsynapse: 1903 P-Bodies: 1901 ER: 2332 Stress granules: 2384 PURA granules: 2085 Transport machinery: 2358 ANXA11 granules: 1850 Presynapse: 1857 FMRP granules: 1492 Paraspeckles: 1751
/home/projects/hornsteinlab/galavir/.conda/envs/nova/lib/python3.9/site-packages/sklearn/svm/_classes.py:32: FutureWarning: The default value of `dual` will change from `True` to `'auto'` in 1.5. Set the value of `dual` explicitly to suppress the warning. warnings.warn(
=== Evaluation Metrics ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.995936 0.972556 0.996477 0.864515 0.999364 0.915359 NaN
Actin Cytoskeleton 0.997973 0.972393 0.998501 0.930458 0.999430 0.950963 NaN
Autophagosomes 0.981841 0.439257 0.993216 0.575822 0.988302 0.498353 NaN
Coated vesicles 0.997067 0.949749 0.998217 0.928291 0.998778 0.938897 NaN
ER 0.997696 0.969815 0.998396 0.938206 0.999241 0.953749 NaN
FMRP granules 0.974257 0.160840 0.996257 0.537530 0.977726 0.247595 NaN
Golgi 0.998335 0.986387 0.998596 0.938828 0.999702 0.962020 NaN
Lysosome 0.997858 0.953861 0.999002 0.961283 0.998801 0.957557 NaN
MOM 0.999404 0.988162 0.999649 0.983986 0.999742 0.986069 NaN
Microtubule 0.997567 0.991831 0.998967 0.995753 0.998008 0.993788 NaN
Mitochondria 0.998655 0.969984 0.999350 0.973120 0.999272 0.971550 NaN
NEMO granules 0.995755 0.873654 0.998919 0.954405 0.996733 0.912246 NaN
Nucleolus 0.999680 0.992504 0.999853 0.993914 0.999819 0.993208 NaN
Nucleus 0.999919 1.000000 0.999890 0.999692 1.000000 0.999846 NaN
P-Bodies 0.979137 0.849883 0.982090 0.520193 0.996520 0.645371 NaN
PML bodies 0.990961 0.783961 0.995105 0.762287 0.995672 0.772972 NaN
PURA granules 0.998483 0.946481 0.999702 0.986756 0.998747 0.966199 NaN
Paraspeckles 0.997548 0.963648 0.998298 0.926106 0.999195 0.944504 NaN
Peroxisome 0.997176 0.977506 0.997615 0.901511 0.999497 0.937972 NaN
Postsynapse 0.990675 0.709271 0.995775 0.752630 0.994736 0.730308 NaN
Presynapse 0.994224 0.766507 0.999079 0.946670 0.995041 0.847115 NaN
Stress granules 0.997768 0.950460 0.998949 0.957585 0.998763 0.954009 NaN
TDP-43 granules 0.994076 0.905795 0.995999 0.831382 0.997944 0.866995 NaN
TIA1 granules 0.994853 0.843268 0.998404 0.925223 0.996337 0.882347 NaN
Transport machinery 0.984417 0.912399 0.986078 0.601814 0.997955 0.725254 NaN
hnRNP complex 0.996313 0.866035 0.999472 0.975494 0.996760 0.917511 NaN
Macro Average 0.994137 0.872931 0.996994 0.871671 0.997003 0.864298 0.923787
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
=== Evaluation Metrics (from aggregated confusion) ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996324 0.942538 0.997545 0.897052 0.998694 0.919233 NaN
Actin Cytoskeleton 0.998061 0.962441 0.998845 0.948269 0.999173 0.955303 NaN
Autophagosomes 0.987305 0.567110 0.995665 0.722412 0.991425 0.635409 NaN
Coated vesicles 0.995306 0.846631 0.998908 0.949433 0.996295 0.895090 NaN
ER 0.998218 0.968260 0.998991 0.961181 0.999181 0.964707 NaN
FMRP granules 0.982969 0.469343 0.995400 0.711753 0.987262 0.565671 NaN
Golgi 0.998300 0.982191 0.998668 0.943941 0.999593 0.962686 NaN
Lysosome 0.998046 0.959869 0.999056 0.964154 0.998938 0.962007 NaN
MOM 0.999164 0.988393 0.999412 0.974815 0.999733 0.981557 NaN
Microtubule 0.995684 0.991571 0.996674 0.986263 0.997967 0.988910 NaN
Mitochondria 0.998575 0.969897 0.999273 0.970171 0.999267 0.970034 NaN
NEMO granules 0.995744 0.935712 0.997282 0.898148 0.998352 0.916546 NaN
Nucleolus 0.999510 0.990671 0.999724 0.988629 0.999774 0.989649 NaN
Nucleus 0.999692 0.999949 0.999600 0.998877 0.999982 0.999413 NaN
P-Bodies 0.987080 0.795033 0.991437 0.678088 0.995331 0.731919 NaN
PML bodies 0.991214 0.759759 0.995777 0.780084 0.995266 0.769788 NaN
PURA granules 0.998196 0.966168 0.998957 0.956515 0.999196 0.961318 NaN
Paraspeckles 0.997601 0.948490 0.998664 0.938936 0.998884 0.943689 NaN
Peroxisome 0.997726 0.956326 0.998643 0.939820 0.999032 0.948001 NaN
Postsynapse 0.990838 0.770186 0.995109 0.752949 0.995550 0.761470 NaN
Presynapse 0.993803 0.871511 0.996427 0.839592 0.997241 0.855254 NaN
Stress granules 0.997121 0.967695 0.997883 0.922089 0.999162 0.944341 NaN
TDP-43 granules 0.995795 0.921887 0.997399 0.884973 0.998303 0.903053 NaN
TIA1 granules 0.995544 0.906632 0.997653 0.901584 0.997785 0.904101 NaN
Transport machinery 0.991169 0.887916 0.993693 0.774917 0.997249 0.827577 NaN
hnRNP complex 0.997407 0.914435 0.999429 0.975002 0.997918 0.943748 NaN
Macro Average 0.995246 0.893870 0.997543 0.894602 0.997560 0.892326 0.938195
=== Average Metrics Across Folds ===
Label Accuracy Sensitivity Specificity PPV NPV F1 Correct/Total Accuracy
ANXA11 granules 0.996328 0.941320 0.997569 0.899490 0.998675 0.919316 NaN
Actin Cytoskeleton 0.998069 0.962190 0.998877 0.949619 0.999149 0.955433 NaN
Autophagosomes 0.987490 0.577837 0.995713 0.727810 0.991563 0.642905 NaN
Coated vesicles 0.995238 0.843361 0.998939 0.952257 0.996198 0.891862 NaN
ER 0.998237 0.968298 0.999010 0.961851 0.999181 0.964981 NaN
FMRP granules 0.983248 0.500910 0.995360 0.678938 0.987623 0.554239 NaN
Golgi 0.998300 0.982083 0.998675 0.944168 0.999586 0.962723 NaN
Lysosome 0.998062 0.960431 0.999059 0.964271 0.998951 0.962338 NaN
MOM 0.999156 0.988358 0.999406 0.974881 0.999730 0.981559 NaN
Microtubule 0.995611 0.991576 0.996585 0.985884 0.997970 0.988709 NaN
Mitochondria 0.998567 0.969969 0.999264 0.969869 0.999268 0.969901 NaN
NEMO granules 0.995768 0.937680 0.997264 0.901382 0.998396 0.917098 NaN
Nucleolus 0.999504 0.990670 0.999718 0.988418 0.999774 0.989539 NaN
Nucleus 0.999679 0.999947 0.999584 0.998828 0.999981 0.999387 NaN
P-Bodies 0.987323 0.793131 0.991734 0.729224 0.995291 0.746888 NaN
PML bodies 0.991236 0.758029 0.995823 0.783319 0.995243 0.769724 NaN
PURA granules 0.998171 0.966871 0.998912 0.956020 0.999215 0.960942 NaN
Paraspeckles 0.997593 0.946960 0.998676 0.939578 0.998865 0.943105 NaN
Peroxisome 0.997751 0.955657 0.998682 0.943006 0.999019 0.948748 NaN
Postsynapse 0.990842 0.769827 0.995088 0.752852 0.995574 0.760648 NaN
Presynapse 0.993773 0.875074 0.996315 0.848080 0.997328 0.854791 NaN
Stress granules 0.997083 0.967977 0.997829 0.922236 0.999177 0.944109 NaN
TDP-43 granules 0.995836 0.922342 0.997430 0.887563 0.998314 0.904347 NaN
TIA1 granules 0.995567 0.908558 0.997623 0.901945 0.997840 0.904105 NaN
Transport machinery 0.991376 0.886795 0.993962 0.810471 0.997203 0.837101 NaN
hnRNP complex 0.997423 0.914896 0.999432 0.975261 0.997932 0.943612 NaN
Macro Average 0.995278 0.895413 0.997559 0.897970 0.997579 0.893004 0.938614
Plotting confusion matrix with 26 classes, fig_size=46.8, font_size=23
{'Accuracy': 0.9952779985005705,
'Sensitivity': 0.8954134727257955,
'Specificity': 0.9975588468110604,
'PPV': 0.8979701162038985,
'NPV': 0.9975787633190297,
'F1': 0.8930041853910872,
'Correct/Total Accuracy': 0.9386139805074171}